2023/12/231031字符

数据爬取

requests

import requests

str = requests.get('http://bozai.tech/')

str.encoding = 'utf-8'
print(str.text)
# print(str.status_code)   #--> 200  状态码
# print(str.encoding)   #--> utf-8  字符编码
# print(str.content)   #--> utf-8  http 响应的二进制方式
# print(str.json)  # json 格式
# print(str.raise_for_status())  # 如果返回不是 200，则报异常

模拟浏览器

import requests

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
}

str = requests.get('http://bozai.tech/', headers=headers)

str.encoding = 'utf-8'
print(str.text)

标签检索

import requests
from bs4 import BeautifulSoup

data = requests.get('http://jzfyjnkj.com/')

data.encoding = 'utf-8'
# print(open(data.text))
html = data.text

suop = BeautifulSoup(html, features="html5lib")

print(suop.body.find_all('img'))